
# 2.1.4 实战：中国大学排名定向爬取

import requests
from bs4 import BeautifulSoup
import bs4

url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'}

res = requests.get(url, headers=headers)
soup = BeautifulSoup(res.content.decode(), 'html.parser')
# print(soup.prettify())
tbody = soup('tbody')
# print(tbody)
# print(tbody[0].children)
# 学校排名信息
result = []
if tbody:
    for tr in tbody[0].children:
        # if tr != '\n' and type(tr) != "<class 'bs4.element.Comment'>":
        # print(type(tr))
        if tr != '\n' and not isinstance(tr, bs4.element.Comment):
            school_info = []
            # AttributeError: 'Comment' object has no attribute 'children'
            for index, td in enumerate(tr.contents[:4]):
                # print('td=', td)
                if index == 0:
                    school_info.append(td.string)
                elif index == 1:
                    school_info.append(td.string)
                elif index == 3:
                    school_info.append(td.string)
            result.append(school_info)
# print(result)
# 格式不好看
# "{0:^4}\t{1:^6}\t{2:^10}"
print("%-10s %20s %s" % ('排名', '学校', '总分'))
for i in result:
    print("%-10s %20s %s" % (i[0], i[1], i[2]))
